## Replication Code for
## "Education and Social Capital"
## Apfeld, Coman, Gerring, and Jessee
## Journal of Experimental Political Science

## NOTE: This code requires the 
## following R packages to be installed 
## (version number used for paper in parentheses):
##     dplyr (1.0.0)
##     tidyr (1.1.0)
##     magrittr (1.5)
##     ggplot2 (3.3.2)
##     ggpubr (0.4.0)
##     rddensity (2.1)
## users should be sure to install all of these 
## packages BEFORE running the code below 

# Load packages and data
library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
load("bac_scores.Rdata")


##### ##### ##### #####
## Figure 1           #
##### ##### ##### #####
# Figure 1 left plot: 2004-2014
fig_1a <- bac %>%
  filter(lowest_score >= 5 & year < 2015) %>%
  ggplot(aes(final_grade_calculated)) +
  geom_histogram(breaks = c(5, seq(from = 5.03, to = 10, by = 0.05), 10)) + 
  geom_vline(xintercept = 5.99,
             color = "red",
             linetype = 4) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_continuous(expand = c(0, 0), breaks = c(0, 2.5, 5, 7.5, 10)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(color = "black"),
        plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
  labs(title = "2004-2014",
       y = "Number of Students",
       x = "Final Grade")

# Figure 1 right plot: 2015-2019
fig_1b <- bac %>%
  filter(lowest_score >= 5 & year >= 2015) %>%
  ggplot(aes(final_grade_calculated)) +
  geom_histogram(breaks = c(5, seq(from = 5.03, to = 10, by = 0.05), 10)) + 
  geom_vline(xintercept = 5.99,
             color = "red",
             linetype = 4) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_continuous(expand = c(0, 0), breaks = c(0, 2.5, 5, 7.5, 10)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(color = "black"),
        plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
  labs(title = "2015-2019", 
       y = "Number of Students",
       x = "Final Grade")

# Arrange both in a single figure
two_up <- ggpubr::ggarrange(fig_1a, fig_1b, ncol = 2)
print(two_up)


##### ##### ##### #####
## Figure 2           #
##### ##### ##### #####
# Load customized version of rdplotdensity function
# Note: function has been modified to plot two estimates at the same time
#       and to return a ggplot object instead to facilitate combining
#       two plots into a single figure. Underlying calculations
#       are otherwise unchanged from those provided by
#       rddensity::rdplotdensity.
source("rdplotdensity_two_lines.R")
bac %<>%
  filter(lowest_score >= 5 & !is.na(final_grade_calculated))

# Break data into two parts based on years
fig_2a_dat <- bac %>%
  filter(year %in% c(2004:2014))
fig_2b_dat <- bac %>%
  filter(year %in% c(2015:2019))



# Calculate RD density values for two datasets using RD cutoff
fig_2a_rd_test <- rddensity::rddensity(fig_2a_dat$final_grade_calculated,
                                       c = 5.99)
fig_2b_rd_test <- rddensity::rddensity(fig_2b_dat$final_grade_calculated,
                                       c = 5.99)

# Plot RD density results using customized function
fig_2a <- rdplotdensity_two_lines(fig_2a_rd_test,
                                  fig_2a_dat$final_grade_calculated,
                                  xlabel = "BAC Score",
                                  title = "2004-2014")

fig_2b <- rdplotdensity_two_lines(fig_2b_rd_test,
                                  fig_2b_dat$final_grade_calculated,
                                  xlabel = "BAC Score",
                                  title = "2015-2019")

# Arrange two plots into a single figure
my_grid <- ggpubr::ggarrange(fig_2a, fig_2b, ncol = 2)
print(my_grid)
